{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### example \n",
    "https://spark.apache.org/docs/latest/sql-data-sources-parquet.html"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "%%classpath add mvn\n",
    "org.apache.spark spark-core_2.11 2.3.1\n",
    "org.apache.spark spark-sql_2.11 2.3.1\n",
    "org.apache.spark spark-hive_2.11 2.3.1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import org.apache.spark.sql.SparkSession\n",
    "val spark = SparkSession.builder()\n",
    "    .master(\"local\")\n",
    "    .appName(\"parquet example\")\n",
    "    .getOrCreate()\n",
    "spark"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "val peopleDF = spark.read.json(\"../resources/data/people.json\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import spark.implicits._\n",
    "peopleDF.write.parquet(\"people.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import spark.implicits._\n",
    "val parquetFileDF = spark.read.parquet(\"people.parquet\")\n",
    "parquetFileDF.createOrReplaceTempView(\"parquetFile\")\n",
    "val namesDF = spark.sql(\"SELECT name FROM parquetFile WHERE age BETWEEN 13 AND 19\")\n",
    "namesDF.map(attributes => \"Name: \" + attributes(0)).show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "peopleDF.count"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Scala",
   "language": "scala",
   "name": "scala"
  },
  "language_info": {
   "codemirror_mode": "text/x-scala",
   "file_extension": ".scala",
   "mimetype": "",
   "name": "Scala",
   "nbconverter_exporter": "",
   "version": "2.11.12"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": false,
   "sideBar": false,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": false,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
